/**********************************************************************/
/*
   Author: Michelle Han
   Created: 30 September, 2022
   Description: Merges and reshapes PMO data transfers:
		7. JPAL_DATA_10: location for newly added observations in batch 1-22

	 Outputs data at person level.
   Note that this code works with PII data (not deidentified).

   Note: to set filepaths, run MASTER.do.

   Output:
	 pmo_b1-22_raw_add_vars_baseline_wide.dta

*/
/**********************************************************************/

	* Set Filepaths
	if "$master_run" !="1" include "./Do/SET_FILEPATHS.do"
	* Log
	cap log close
	local prefix: display %tdCYND td(`c(current_date)')
	log using "$KP_logs/`prefix'_PMO_merge_wide_add_vars_domicili_b1-22.txt", replace text

/*----------------------------------------------------*/
            /* Section: Append batches */
/*----------------------------------------------------*/

* load first batch
	u "$KP_deid_admin/Raw/JPAL_DATA_10/domisili_1.dta", clear
	gen batch = 1

* loop to append each batch
	qui forval i = 2 / 22 {
		noi di "Appending Batch `i'"
		preserve
			u "$KP_deid_admin/Raw/JPAL_DATA_10/domisili_`i'.dta", clear
			gen batch = `i'
			tempfile `i'
			qui save ``i''
		restore
		append using ``i''
	}

* Check uniqueness
	gisid anon_id4 batch
	gsort anon_id4 batch

	/*----------------------------------------------------*/
	            /* Section: Check Data */
	/*----------------------------------------------------*/

* Check consistency
* 31,283 changed provinces
* 64,758 changed cities
	foreach var of varlist anon_prov_id {
		gsort anon_id4 `var'
		by anon_id4 (`var'): gen tag_`var' = `var'[1] != `var'[_N]
		tab tag_`var'
	}

	drop tag_*

* Variable to keep most recent location history
	gsort anon_id4
	by anon_id4: gegen last_apply_batch = max(batch)

	/*----------------------------------------------------*/
	            /* Section: Reshape */
	/*----------------------------------------------------*/

* Prep to reshape wide
	compress

* Reshape wide
	local varlist anon_prov_id last_apply_batch java urban
	keep `varlist' anon_id4 batch

	greshape wide `varlist ', ///
		i(anon_id4) j(batch) nochecks benchmark

* Clean up data: keep most recent location history
	egen last_apply_batch = rowfirst(last_apply_batch*)
	drop last_apply_batch?*

	gen anon_prov_id = .

	qui forval i = 1 / 22 {
		replace anon_prov_id = anon_prov_id`i' if last_apply_batch == `i'
	}

	drop anon_prov_id?*

* Save raw merged reshaped data
datasignature 
if "`r(datasignature)'" == "23157532:47(59746):4254419548:4046799751" {
   save "$KP_deid_admin/Clean/pmo_b1-22_raw_add_vars_domicili_wide.dta", replace
      }
else {
   di as err "Careful, your machine produces a different dataset"
   stop
		}

	cap log close


// DONE
